Setup

library(tidyverse)
library(pals)
library(tidygraph)
library(igraph)
library(qgraph)
library(ggraph)
library(ggnewscale)
library(ggpubr)

knitr::opts_chunk$set(python.reticulate = FALSE)
setwd('~/repos/global-resistome2/')

# Load various custom functions used in this notebook
source('src/network_functions.R')

# Load lists of variables used for plotting and other tasks.
source('src/corr_defaults.R')

Load data

Host label to host groups

count.data <- read_csv('data/resistome_uc90_v2.csv')
## New names:
## * `` -> ...1
## Rows: 2652971 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): run_accession, refSequence
## dbl (3): ...1, clust_number, fragmentCountAln
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# query for meta data
# select * from Meta_public;
meta.data <- read_tsv('data/metadata.tsv', na = c("NULL", "")) %>%
  mutate(
    host = case_when(
      is.na(host) ~ 'Mising',
      TRUE ~ host
    )
  ) %>%
  left_join(enframe(host2group), by=c("host" = "name")) %>%
  rename("hostGroup" = "value")
## New names:
## * `` -> ...1
## Rows: 214022 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): run_accession, host, instrument_platform, country, continent
## dbl (7): ...1, year, latitude, longitude, raw_reads, raw_bases, trimmed_frag...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
all.data <-   meta.data  %>% 
  inner_join(count.data, by=c("run_accession")) %>%
  select(c(run_accession, host, hostGroup, year, latitude, longitude, country, instrument_platform, raw_reads, refSequence, fragmentCountAln, trimmed_fragments))
host.gene.counts <- all.data %>%
  group_by(hostGroup, refSequence) %>%
  summarise(fragmentCountAln=sum(fragmentCountAln))
## `summarise()` has grouped output by 'hostGroup'. You can override using the `.groups` argument.
all.gene.counts <- all.data %>%
  group_by(refSequence) %>%
  summarise(fragmentCountAln=sum(fragmentCountAln)) %>%
  mutate(hostGroup = 'all') %>%
  select(hostGroup, refSequence, fragmentCountAln)

trimmedFrag.hosts <- rbind(
  all.data %>%
  distinct(hostGroup, run_accession, host, trimmed_fragments) %>%
  group_by(hostGroup) %>%
  summarise(trimmed_fragments=sum(trimmed_fragments)), 
  all.data %>%
  distinct(hostGroup, run_accession, host, trimmed_fragments) %>%
  summarise(trimmed_fragments=sum(trimmed_fragments)) %>%
  mutate(hostGroup='all')) %>%
  filter(!is.na(hostGroup))

gene.counts <- rbind(all.gene.counts, host.gene.counts) %>%
  left_join(trimmedFrag.hosts, by=c("hostGroup")) %>%
  mutate(FPKM = fragmentCountAln / (trimmed_fragments / 1e6))

Make network files

resClasses <- get_resClasses()
## Loading required package: RMariaDB
makeFiles <- T
if (makeFiles) {
  corrFiles <- list.files(path='data/res_minF50_minS10', pattern=glob2rx("*_corr.npy"), full.names = T)
  pvalFiles <- list.files(path='data/res_minF50_minS10', pattern=glob2rx("*_pval.npy"), full.names = T)
  colFiles <- list.files(path='data/res_minF50_minS10', pattern=glob2rx("*_columns.list"), full.names = T)
  
  assertthat::assert_that(all(length(corrFiles) > 0,c(length(corrFiles) == length(pvalFiles), length(corrFiles) == length(colFiles), length(pvalFiles) == length(colFiles))))
  alpha = 0.01
  min_corr = 0.6
  
  all.matrices <- list()
  for (i in 1:length(corrFiles)) {
    mat <- dataLoader(
        corrFile=corrFiles[i],
        pvalFile=pvalFiles[i],
        colFile=colFiles[i]
    )
    
    host <- get_host(str_replace(corrFiles[i], '_sparr_corr.npy', ''))
    all.matrices <- c(all.matrices, setNames(list(mat), host))
    
    prefix = str_replace(basename(corrFiles[i]), '_sparr_corr.npy', '')
    graphFile = file.path('output/networks_sparcc_frag50_minn10', paste0(prefix, '_', min_corr))
    print(paste("Writing network json file to", graphFile))
    graph <- make_net(mat, resClasses, alpha, min_corr, graphFile)
  }
}
## [1] "Writing network json file to output/networks_sparcc_frag50_minn10/sparcc_air_0.6"
## [1] "Writing network json file to output/networks_sparcc_frag50_minn10/sparcc_all_0.6"
## [1] "Writing network json file to output/networks_sparcc_frag50_minn10/sparcc_canis_lupus_0.6"
## [1] "Writing network json file to output/networks_sparcc_frag50_minn10/sparcc_chicken_0.6"
## [1] "Writing network json file to output/networks_sparcc_frag50_minn10/sparcc_cow_0.6"
## [1] "Writing network json file to output/networks_sparcc_frag50_minn10/sparcc_freshwater_0.6"
## [1] "Writing network json file to output/networks_sparcc_frag50_minn10/sparcc_homo_sapiens_0.6"
## [1] "Writing network json file to output/networks_sparcc_frag50_minn10/sparcc_marine_0.6"
## [1] "Writing network json file to output/networks_sparcc_frag50_minn10/sparcc_mouse_0.6"
## [1] "Writing network json file to output/networks_sparcc_frag50_minn10/sparcc_pig_0.6"
## [1] "Writing network json file to output/networks_sparcc_frag50_minn10/sparcc_soil_0.6"
# Grab names for all the different network files and load them
netFiles <- list.files(path='/Users/hanmar/repos/global-resistome2/output/networks_sparcc_frag50_minn10', pattern = glob2rx('sparcc_*_0.6.json'), full.names = T)

networks <- list()
for (netFile in netFiles) {
  host <- get_host(netFile)
  if (!host %in% hosts) {
    next
  }
  
  # load graph file
  G <- importGraph(netFile) %>%
    activate(edges) %>%
    mutate(weight=corr) %>%
    select(-c(Var1_class, Var2_class)) %>%
    activate(nodes) %>%
    select(-ResFinder_class) %>%
    left_join(resClasses, by=c("name")) 
  
  networks <- c(networks, setNames(list(G), host))
}

Main figures and tables

Table 1: Grouped labels for host and environments

rbind(
  meta.data %>% group_by(hostGroup) %>% summarise(`Number of samples`=n()),
  meta.data %>% summarise(`Number of samples`=n()) %>% mutate(hostGroup = 'all')
) %>% left_join(
  rbind(
    all.data %>% group_by(hostGroup) %>% summarise(`Number of samples with ARGs`=n_distinct(run_accession)), 
    all.data %>% summarise(`Number of samples with ARGs`=n_distinct(run_accession)) %>% mutate(hostGroup = 'all')
  ),
  by='hostGroup'
) %>%
  mutate(
    hostGroup = str_to_title(case_when(
      hostGroup == 'canis_lupus' ~ 'Dog',
      hostGroup == 'homo_sapiens' ~ 'Human',
      TRUE ~ hostGroup
  )
  ))
## # A tibble: 12 × 3
##    hostGroup  `Number of samples` `Number of samples with ARGs`
##    <chr>                    <int>                         <int>
##  1 Air                        914                           870
##  2 Dog                       3439                          3182
##  3 Chicken                   1223                          1215
##  4 Cow                        886                           824
##  5 Freshwater                4494                           585
##  6 Human                    95003                         57239
##  7 Marine                   30002                          5444
##  8 Mouse                     3947                          3909
##  9 Pig                       3229                          3171
## 10 Soil                      6533                          2822
## 11 <NA>                     64352                         39945
## 12 All                     214022                        119206

Table 2: Relative abundance of read fragments

get_host_order <- function(x){return(which(x == hosts))}

sum.gene.counts <- gene.counts %>%
  filter(!is.na(hostGroup)) %>%
  left_join(resClasses, by=c("refSequence"="name")) %>%
  group_by(hostGroup, ResFinder_class) %>%
  summarise(fragmentCountAln=sum(fragmentCountAln), nGeneClass = n_distinct(refSequence)) #%>%
  #pivot_wider(id_cols=hostGroup, names_from=ResFinder_class, values_from=fragmentCountAln) 


sum.gene.counts2 <- sum.gene.counts %>%
  group_by(hostGroup) %>%
  summarise(sumVar=sum(fragmentCountAln), nGene=sum(nGeneClass)) %>%
  left_join(sum.gene.counts, by=c("hostGroup")) %>%
  mutate(
    fragClosed = (100 / sumVar) * fragmentCountAln,
    hostOrder = mapply(get_host_order, hostGroup),
    hostGroup2 = str_to_title(case_when(
      hostGroup == 'canis_lupus' ~ 'Dog',
      hostGroup == 'homo_sapiens' ~ 'Human',
      TRUE ~ hostGroup))
  )


resClasses %>% 
  filter(name %in% unique(count.data$refSequence)) %>%
  group_by(ResFinder_class) %>% 
  summarise(n=n()) %>%
  mutate(label=paste0(ResFinder_class, ' (', n, ')')) %>%
  right_join(sum.gene.counts2, by=c('ResFinder_class' = 'ResFinder_class')) %>%
  pivot_wider(id_cols='label', names_from='hostGroup2', values_from='fragClosed') %>%
  write_tsv(file='figs/table_class_rel_abundances.tsv')

Figure 1: Overview of networks

For Figure 1a: Each network is visualized by using the Fruchterman-Reingold layout and save as a pdf/png/tiff.

source.network.plots <- list()
all.network <- NULL
for (host in names(networks)) {

  G <- networks[[host]]  

  if(host == 'homo_sapiens') {
    host = 'human'
  } else if (host == 'canis_lupus') {
    host = 'dog'
  }
  plottitle <- str_to_title(str_replace(host, '_', ' '))
  
  
  # define layout
  e <- get.edgelist(G, names=F)
  layout <- qgraph.layout.fruchtermanreingold(e, vcount = vcount(G),
                                              area = 6*(vcount(G)^2), repulse.rad = vcount(G)^2.5)

  #  make visualizations with and without legends
  graph.plot.leg <- ggraph(G, layout="manual", x=layout[,1], y=layout[,2]) +
    geom_edge_link0(aes(color=corr, width=corr, alpha=corr)) +
    geom_node_point(aes(color=ResFinder_class)) +
    scale_edge_color_viridis("Correlation",direction=-1, limits=c(0.6,1)) +
    scale_edge_width(range=c(.2, 1), guide="none") +
    scale_edge_alpha(range=c(0.5, 0.9), guide="none") +
    scale_color_manual("ResFinder class", values=classes_colors) +
    # scale_shape_manual("Classified\nas CIA", values=c('0' = 15, '1'=17)) +
    ggtitle(plottitle) +
    theme_graph(base_family="sans")
  
  ggsave(plot=graph.plot.leg, filename=paste0('figs/network_', host, '.png'), width=15, height=10)



  graph.plot.noleg <- ggraph(G, layout="manual", x=layout[,1], y=layout[,2]) +
    geom_edge_link0(aes(color=corr, width=corr, alpha=corr)) +
    geom_node_point(aes(color=ResFinder_class)) +
    scale_edge_color_viridis(direction=-1, guide="none", limits=c(0.6, 1)) +
    scale_edge_width(range=c(.1, 1), guide="none") +
    scale_edge_alpha(range=c(0.5, 0.9), guide="none") +
    scale_color_manual(values=classes_colors, guide="none") +
    ggtitle(plottitle) +
    theme_graph(title_size = 14, base_family="sans")
  ggsave(plot=graph.plot.noleg, filename=paste0('figs/network_', host, '_noleg.png'), width=10, height=10)
  
  graph.plot.noleg.cia <- ggraph(G, layout="manual", x=layout[,1], y=layout[,2]) +
    geom_edge_link0(aes(color=corr, width=corr, alpha=corr)) +
    geom_node_point(aes(color=ResFinder_class)) +
    scale_edge_color_viridis(direction=-1, limits=c(0.6,1), guide="none") +
    scale_edge_width(range=c(.2, 1), guide="none") +
    scale_edge_alpha(range=c(0.5, 0.9), guide="none") +
    scale_color_manual(values=classes_colors, guide="none") +
    # scale_shape_manual(values=c('0' = 15, '1'=17), guide="none") +
    ggtitle(plottitle) +
    theme_graph(base_family="sans")
  ggsave(plot=graph.plot.noleg.cia, filename=paste0('figs/network_', host, '_noleg_cia.png'), width=10, height=10)

  
  # with arg names on it
  graph.plot.anno <- G %>%
    activate(nodes) %>%
    mutate(name2 = sub('_[^_]+$', '', name)) %>%
   ggraph(layout="manual", x=layout[,1], y=layout[,2]) +
    geom_edge_link0(aes(color=corr, width=corr, alpha=corr)) +
    geom_node_point(aes(color=ResFinder_class)) +
    geom_node_text(aes(label=name2), size=2, repel = T, check_overlap = T) +
    scale_edge_color_viridis("Correlation",direction=-1, limits=c(0.6,1)) +
    scale_edge_width(range=c(.2, 1), guide="none") +
    scale_edge_alpha(range=c(0.7, 0.9), guide="none") +
    scale_color_manual("ResFinder class", values=classes_colors) +
    ggtitle(plottitle) +
    theme_graph(title_size = 14, base_family="sans")
  ggsave(plot=graph.plot.anno, filename=paste0('figs/network_', host, '_anno.png'), width=15, height=10)

  if(host == 'all') {
    all.network <- graph.plot.leg
  } else {
    source.network.plots <- c(source.network.plots, setNames(list(graph.plot.noleg.cia), host))
  }
}

p.sources <- ggarrange(plotlist=source.network.plots[order(names(source.network.plots))], nrow=2, ncol=ceiling(length(source.network.plots)/2), common.legend = T)
p.combined <- ggarrange(all.network, p.sources, ncol=2, common.legend = T, legend='bottom', widths=c(.5, 1), labels='a')
ggsave(plot=p.combined, filename='figs/fig1a_network_combined.png', width=22, height=8)
ggsave(plot=p.combined, filename='figs/fig1a_network_combined.pdf', width=22, height=8)
ggsave(plot=p.combined, filename='figs/fig1a_network_combined.tiff', width=22, height=8)

For Figure 1b: Summarise global properties of the different networks

network.characteristics <- tibble()
for (host in names(networks)) {
  
  G <- networks[[host]] #importGraph(netFile)
  
  net.characteristics <- summarise.graph(G) %>%
    mutate(host=host, sort_order=which(hosts == host))
  network.characteristics <- rbind(network.characteristics, net.characteristics)
  
}

network.characteristics <- network.characteristics %>%
  mutate(
    host = str_to_title(case_when(
      host == 'canis_lupus' ~ 'Dog',
      host == 'homo_sapiens' ~ 'Human',
      TRUE ~ host
    ))
)
      
network.characteristics.final <- network.characteristics %>%
  mutate(
    global_clustering_coefficient = round(global_clustering_coefficient, 3),
    edge_density = round(edge_density, 3),
    network_density = round(network_density,3)
    ) %>%
  arrange(sort_order) %>%
  select(host, number_of_nodes, number_of_edges, global_clustering_coefficient, network_density, edge_density, number_of_components)

(p.netchars <- ggtexttable(
  network.characteristics.final, 
  cols = c(
    str_wrap('Network', width=7),
    str_wrap('No. nodes', width=7),
    str_wrap('No. edges', width=7),
    str_wrap('Global clustering coefficient', width=17),
    str_wrap('Network density', width=7),
    str_wrap('Edge density', width=7),
    str_wrap('No. components', width=7)
  ),
  rows=rep('', nrow(network.characteristics.final)),
  theme = ttheme(base_style="light", padding=unit(c(2,2), "mm"))
))

For Figure 1c: Distribution of correlation coefficients / weights

all.edges <- tibble()
for (host in names(networks)) {
  
  G <- networks[[host]] 
  
  host_order = which(host == hosts)
  
  edge.data <- G %>%
    as_data_frame %>%
    select(from, to, corr) %>%
    mutate(host=host, host_order=host_order)
  all.edges <- rbind(all.edges, edge.data)
}


all.edges <- all.edges %>%
  mutate(
    host = str_to_title(case_when(
      host == 'canis_lupus' ~ 'Dog',
      host == 'homo_sapiens' ~ 'Human',
      TRUE ~ host
    ))
  )

(p.edge.dist <- ggplot(all.edges) +
  geom_histogram(aes(x=corr), color='white', binwidth = 0.01) +
  facet_grid(fct_reorder(host, host_order)~., scales='free_y') +
  xlab('Correlation') + ylab('Count') + 
  #theme_pubr() 
  theme_light() +
  theme(axis.text.y = element_text(size=8)) 
)

For figure 1d:Overlapping nodes and edges in the networks

overlapping.edges <- matrix(nrow=length(networks), ncol=length(networks))
overlapping.nodes <- matrix(nrow=length(networks), ncol=length(networks))

colnames(overlapping.edges) <- rownames(overlapping.edges) <- names(networks)
colnames(overlapping.nodes) <- rownames(overlapping.nodes) <- names(networks)

for (hostSet in utils::combn(names(networks), m=2, simplify = F)) {
  h1 = hostSet[1]
  h2 = hostSet[2]
  
  G1 <- networks[[h1]]
  G2 <- networks[[h2]]
  
  if (any(is.null(G1), is.null(G2))) {
    next
  }
  
  overlapping.edges[h1, h2] <- length(intersect(E(G1), E(G2)))
  overlapping.nodes[h1, h2] <- length( intersect(V(G1), V(G2)))

}

overlapping.edges <- as_tibble(overlapping.edges,rownames = 'h1') %>%
  pivot_longer(-h1, names_to = 'h2')
overlapping.nodes <- as_tibble(overlapping.nodes,rownames = 'h1') %>%
  pivot_longer(-h1, names_to = 'h2') 

(p.overlaps <- ggplot() +
  geom_tile(data=overlapping.nodes, aes(x=h1, y=h2, fill=value)) +
  geom_text(data=overlapping.nodes, aes(x=h1, y=h2, label=value), color='white') +
  scale_fill_viridis_c(str_wrap("Overlapping nodes", 11), na.value=NA) +
  new_scale_fill() +
  geom_tile(data=overlapping.edges, aes(y=h1, x=h2, fill=value)) +
  geom_text(data=overlapping.edges, aes(y=h1, x=h2, label=value), color='white') +
  scale_fill_viridis_c(str_wrap("Overlapping edges", 11), na.value=NA, option='H') +
  scale_x_discrete(labels=c('Air', 'All', 'Dog', 'Chicken', 'Cow', 'Freshwater', 'Human', 'Marine', 'Mouse', 'Pig', 'Soil')) +
  scale_y_discrete(labels=c('Air', 'All', 'Dog', 'Chicken', 'Cow', 'Freshwater', 'Human', 'Marine', 'Mouse', 'Pig', 'Soil')) +
  theme_classic() +
  theme(
    axis.title = element_blank(),
    legend.margin=margin(0,0,0,0)
  )
)

combine

p.net_descs <- ggarrange(p.netchars, p.edge.dist, p.overlaps, ncol=3, labels=c('b', 'c', 'd'))
## Warning: Removed 66 rows containing missing values (geom_text).

## Warning: Removed 66 rows containing missing values (geom_text).
ggsave(plot=p.net_descs, filename='figs/fig1bcd_network_characteristics.png', width=20, height=6)
ggsave(plot=p.net_descs, filename='figs/fig1bcd_network_characteristics.pdf', width=20, height=6)
ggsave(plot=p.net_descs, filename='figs/fig1bcd_network_characteristics.tiff', width=20, height=6)

Figure 2: Correlation profiles for glycopeptide and macrolide resistances

make_symmetric <- function(data, value.var) {
  mat <- reshape2::dcast(data, AM1 ~ AM2, value.var=value.var)
  rownames(mat) <- mat$AM1
  mat <- mat[, -1]
  return(mat)
}

pretty_melt <- function(data.upper, data.lower, data.upper.value = 'avgcorr', data.lower.value='n_edges') {
  
  data.upper[lower.tri(data.upper)] <- NA
  data.lower[upper.tri(data.lower)] <- NA
  
  data.upper.long <- as_tibble(data.upper, rownames = 'AM1') %>% 
    pivot_longer(!'AM1', names_to='AM2', values_to=data.upper.value)  
  
  data.lower.long <- as_tibble(data.lower, rownames = 'AM1') %>% 
    pivot_longer(!'AM1', names_to='AM2', values_to=data.lower.value)  
  
  return(plyr::rbind.fill(data.upper.long,data.lower.long))
}

averageclasses.all <- tibble()#list(C1 = NA, C2 = NA, host=NA, avgcorr=NA, n_edges=NA))
for (host in names(networks)) {
  G <- networks[[host]]
  ho <- which(host == hosts)
  
  edge.matrix <- as_adjacency_matrix(G,attr = 'corr', sparse = F, type='upper')
  
  gene2class <- G %>% as_tibble() %>% select(name, ResFinder_class) %>%
    separate_rows(ResFinder_class, sep='/')
  
  averagesclasses <- tibble()
  for (amcombi  in combn(unique(gene2class$ResFinder_class), m=2,simplify = F)){
    am.genes <- gene2class[gene2class$ResFinder_class %in% amcombi, ]$name
    
    am.res <- reshape2::melt(edge.matrix[am.genes, am.genes], value.name = 'corr') %>%
      filter(corr > 0) %>%
      mutate(zscore = DescTools::FisherZ(corr)) %>%
      summarise(meanzscore = mean(zscore), n_edges = n()) %>%
      mutate(avgcorr = DescTools::FisherZInv(meanzscore), host=host, host_order=ho, AM1 = amcombi[1], AM2=amcombi[2])
    
    averagesclasses <- rbind(averagesclasses, am.res)
  
  }
  
  averageclasses <- rbind(averagesclasses, averagesclasses %>% rename(AM2 = AM1, AM1=AM2))
  
  avgcorr.mat <- make_symmetric(averageclasses,value.var='avgcorr')
  n_edges.mat <- make_symmetric(averageclasses,value.var='n_edges')
  
  averagedata <- pretty_melt(avgcorr.mat, n_edges.mat) %>%mutate(host=host, ho=ho)
  averageclasses.all <- rbind(averageclasses.all, averagedata)

}

classesAM <- sort(unique(unlist(str_split(unique(resClasses$ResFinder_class), '/'))))
diag_df <- as_tibble(list(host=hosts)) %>% mutate(x=0,xend=length(classesAM)+1,y=0,yend=length(classesAM)+1)

averageclasses.all <- averageclasses.all %>%
  filter(!(is.na(avgcorr) & is.na(n_edges))) %>%
  mutate(
    host = str_to_title(case_when(
      host == 'canis_lupus' ~ 'Dog',
      host == 'homo_sapiens' ~ 'Human',
      TRUE ~ host
        )
      )
  )
am_combis <- as_tibble(t(combn(classesAM, m=2, simplify=T)))
coord_data <- cbind(
  do.call("rbind", replicate(length(hosts), am_combis, simplify=F)), 
  as_tibble(list(host=rep(hosts, each=nrow(am_combis)),host_order=rep(1:length(hosts), each=nrow(am_combis))))
  ) %>%
  rename(Target=V1, y=V2)
# 
coord_data <- rbind(coord_data, rename(coord_data, Target=y, y=Target))

ams <- list('Glycopeptide', 'Macrolide')
filt.coord.data <- coord_data %>% filter(Target %in% ams)  %>%
  mutate(host = str_to_title(case_when(
        host == 'canis_lupus' ~ 'Dog',
        host == 'homo_sapiens' ~ 'Human',
        TRUE ~ host
  )))
dot.data <- rbind(
  filter(averageclasses.all, AM1 %in% ams, !is.na(avgcorr)) %>% rename(Target = AM1, y=AM2),
  filter(averageclasses.all, AM2 %in% ams, !is.na(avgcorr)) %>% rename(Target = AM2, y=AM1)
)  %>%
  mutate(
    host = str_to_title(case_when(
        host == 'canis_lupus' ~ 'Dog',
        host == 'homo_sapiens' ~ 'Human',
        TRUE ~ host
  )))

filt.coord.data %>%
  left_join(dot.data, by=c("Target", "y", "host")) %>%
ggplot() +
  geom_point(aes(x=reorder(host, host_order), y=y),  shape=21, size=4, stroke=.1, fill='white') +
  geom_point(mapping=aes(x=host, y=y, fill=avgcorr), shape=21, size=4, stroke=.1) +
  # geom_point(mapping=aes(x=reorder(host, host_order), y=y), fill='white', shape=21, size=4, stroke=.1) +
  scale_fill_viridis("Average\ncorrelation",direction=-1, limits=c(0.6,1), na.value='white')  +
  facet_wrap(~Target) +
  theme_light() +
  theme(axis.title=element_blank(), axis.text.x=element_text(angle=90, hjust=1, vjust=0.5))

ggsave(filename=paste0('figs/fig2_sel_cia_ams.png'), width=6*length(ams), height=6)
ggsave(filename=paste0('figs/fig2_sel_cia_ams.pdf'), width=6*length(ams), height=6)

Figure 3: Correation profiles for pleuromutilin and tetracycline resistances

ams <- list('Tetracycline', 'Pleuromutilin')
filt.coord.data <- coord_data %>% filter(Target %in% ams)  %>%
  mutate(host = str_to_title(case_when(
        host == 'canis_lupus' ~ 'Dog',
        host == 'homo_sapiens' ~ 'Human',
        TRUE ~ host
  )))
dot.data <- rbind(
  filter(averageclasses.all, AM1 %in% ams, !is.na(avgcorr)) %>% rename(Target = AM1, y=AM2),
  filter(averageclasses.all, AM2 %in% ams, !is.na(avgcorr)) %>% rename(Target = AM2, y=AM1)
)  %>%
  mutate(
    host = str_to_title(case_when(
        host == 'canis_lupus' ~ 'Dog',
        host == 'homo_sapiens' ~ 'Human',
        TRUE ~ host
  )))

filt.coord.data %>%
  left_join(dot.data, by=c("Target", "y", "host")) %>%
ggplot() +
  geom_point(aes(x=reorder(host, host_order), y=y),  shape=21, size=4, stroke=.1, fill='white') +
  geom_point(mapping=aes(x=host, y=y, fill=avgcorr), shape=21, size=4, stroke=.1) +
  # geom_point(mapping=aes(x=reorder(host, host_order), y=y), fill='white', shape=21, size=4, stroke=.1) +
  scale_fill_viridis("Average\ncorrelation",direction=-1, limits=c(0.6,1), na.value='white')  +
  facet_wrap(~Target) +
  theme_light() +
  theme(axis.title=element_blank(), axis.text.x=element_text(angle=90, hjust=1, vjust=0.5))

ggsave(filename=paste0('figs/fig3_sel_ia_ams.png'), width=6*length(ams), height=6)
ggsave(filename=paste0('figs/fig3_sel_ia_ams.pdf'), width=6*length(ams), height=6)

Supplementary figures

Figure S1: Metagenomic origins

Define colors for the various sampling sources

host_colors_palette <- tibble(host=c(hosts, 'other'), hostOrder=1:12, color=c(pals::tol(n=11), 'grey')) %>%
  mutate(
    hostGroup = str_to_title(case_when(
      host == 'canis_lupus' ~ 'Dog',
      host == 'homo_sapiens' ~ 'Human',
      TRUE ~ host))
)
host_colors = setNames(host_colors_palette$color, host_colors_palette$hostGroup)
host_col_scale <- scale_colour_manual(name = "Sampling source", values = host_colors, limits = force)
host_fill_scale <- scale_fill_manual(name = "Sampling source", values = host_colors, limits = force)

Create sampling year plot

p.meta.year <- meta.data %>%
  mutate(year=replace_na(year, 'Unknown'), hostGroup=replace_na(hostGroup, 'other')) %>%
  group_by(hostGroup, year) %>%
  summarise(n_runs=n_distinct(run_accession)) %>%
  ungroup() %>%
  mutate(
    hostOrder = replace_na(mapply(get_host_order, hostGroup), length(hosts)+1),
    hostGroup = str_to_title(case_when(
      hostGroup == 'canis_lupus' ~ 'Dog',
      hostGroup == 'homo_sapiens' ~ 'Human',
      TRUE ~ hostGroup))
  ) %>%
  ggplot() +
  geom_col(aes(y=year, x=n_runs, fill=reorder(hostGroup, hostOrder))) +
  host_fill_scale +
  ylim(seq(2000, 2020, 1), 'Unknown') +
  xlab('Sample count') +
  ylab('Sampling year') +
  theme_light()
## `summarise()` has grouped output by 'hostGroup'. You can override using the `.groups` argument.
meta.data %>%
  filter(is.na(year)) %>%
  dim()
## [1] 84238    13

Create overview of sequencing platforms

p.meta.platforms <- meta.data %>%
  mutate(hostGroup = str_to_title(case_when(
      hostGroup == 'canis_lupus' ~ 'Dog',
      hostGroup == 'homo_sapiens' ~ 'Human',
      is.na(hostGroup) ~ 'Other',
      TRUE ~ hostGroup))) %>%
  ggplot() +
  geom_col(aes(x=raw_reads, y=instrument_platform, fill=hostGroup)) + 
  host_fill_scale +
  facet_wrap(instrument_platform ~ ., scales='free', ncol=1) +
  theme_light() +
  theme(strip.text = element_blank(), strip.background = element_blank()) +
  xlab('Count of raw sequencing reads') + 
  ylab('Instrument platform') 

Create overview of sampling locations

library(maps)
map <- map_data('world')

gps.runs.host <- meta.data %>%
  mutate(hostGroup = str_to_title(case_when(
      hostGroup == 'canis_lupus' ~ 'Dog',
      hostGroup == 'homo_sapiens' ~ 'Human',
      is.na(hostGroup) ~ 'Other',
      TRUE ~ hostGroup))) %>%
  group_by(latitude, longitude, hostGroup) %>%
  summarise(n_runs=n_distinct(run_accession))

gps.runs <- meta.data %>%
  group_by(latitude, longitude) %>%
  summarise(n_runs=n_distinct(run_accession)) %>%
  mutate(hostGroup = 'all')

gps.runs.combi <- rbind(gps.runs, gps.runs.host) %>%
  mutate(
    hostOrder = mapply(get_host_order, hostGroup),
    hostGroup = str_to_title(case_when(
      hostGroup == 'canis_lupus' ~ 'Dog',
      hostGroup == 'homo_sapiens' ~ 'Human',
      TRUE ~ hostGroup))
  )

p.meta.map <- ggplot() +
  geom_polygon(data=map, aes(x=long, y = lat, group = group), fill="grey", alpha=0.3) +
  geom_point(data=gps.runs.host , aes(x=longitude, y=latitude, size=n_runs, color=hostGroup), alpha=.5) + 
  host_col_scale +
  scale_size('Number of samples', range=c(.5, 5)) +
  theme_void() +
  theme(strip.text.x = element_text(size=14)) +
  guides(color=F)

rbind(
  meta.data %>%
    filter(is.na(latitude), is.na(longitude))%>%
    summarise(n=n_distinct(run_accession)) %>%
    mutate(hostGroup='all'),
  meta.data %>%
    group_by(hostGroup) %>%
    filter(is.na(latitude), is.na(longitude))%>%
    summarise(n=n_distinct(run_accession))
)
## # A tibble: 12 × 2
##        n hostGroup   
##    <int> <chr>       
##  1 83361 all         
##  2    16 air         
##  3  3159 canis_lupus 
##  4   570 chicken     
##  5   262 cow         
##  6    61 freshwater  
##  7 40003 homo_sapiens
##  8   363 marine      
##  9  2842 mouse       
## 10   320 pig         
## 11   477 soil        
## 12 35288 <NA>

Combine into one figure

ggarrange(
  ggarrange(p.meta.year, p.meta.platforms, common.legend = T, legend = 'right', labels=c('a.', 'b.')), 
  p.meta.map, common.legend=F, ncol=1, labels=c('', 'c.'),heights = c(.75, 1))
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning: Removed 15 rows containing missing values (position_stack).
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning: Removed 15 rows containing missing values (position_stack).
## Warning: Removed 21 rows containing missing values (geom_point).

ggsave(filename='figs/figS1_run_metadata.png', width=14, height=10)
ggsave(filename='figs/figS1_run_metadata.pdf', width=14, height=10)
ggsave(filename='figs/figS1_run_metadata.tiff', width=14, height=10)

Figure S2: Top 10 most abundant ARGs per sampling group

# Genes with most fragments in the various environmental and host sources
gene.rels <- gene.counts %>%
  filter(!is.na(hostGroup)) %>%
  group_by(hostGroup) %>%
  summarise(sumVar=sum(fragmentCountAln)) %>%
  left_join(gene.counts, by=c("hostGroup")) %>% 
  mutate(
    fragClo = (100 / sumVar) * fragmentCountAln
  )

gene.rels.top10 <- gene.rels %>%
  group_by(hostGroup) %>%
  top_n(wt=fragClo, n=10) %>%
  left_join(resClasses, by=c("refSequence"="name")) %>%
  mutate(
    refSequence = sub('_[^_]+$', '', refSequence),
    hostOrder = mapply(get_host_order, hostGroup),
    hostGroup2 = str_to_title(case_when(
      hostGroup == 'canis_lupus' ~ 'Dog',
      hostGroup == 'homo_sapiens' ~ 'Human',
      TRUE ~ hostGroup))
  )

(p.gene.rels <- ggplot(data=gene.rels.top10) + 
  geom_bar(aes(x=refSequence, y=fragClo, fill=ResFinder_class), stat='identity') + 
  facet_wrap(~fct_reorder(hostGroup2, hostOrder), scales='free_x', ncol=3) + 
  theme_light() + 
  theme(axis.text.x = element_text(angle=90)) +
  scale_fill_manual(values=classes_colors, guide="none") +
  xlab('ARG') + ylab('Relative abundance (%)') +
  ylim(-5,100)
)

# (p.rels.combined <- ggarrange(p.class.rels, p.gene.rels, ncol = 1, nrow=2, labels = 'auto', heights = c(0.75, 1)))
ggsave('figs/figS2_relative_gene_abundances.pdf', plot = p.gene.rels, width=15, height=15)
ggsave('figs/figS2_relative_gene_abundances.png', plot = p.gene.rels, width=15, height=15)
ggsave('figs/figS2_relative_gene_abundances.eps', plot = p.gene.rels, width=15, height=15)

Figure S3: Distribution of correlations and p-values

all.p.edges <- list()
for (host in hosts) {
  
  hostMat <- all.matrices[[host]]
  if (host == 'canis_lupus') {
    host = 'dog'
  } else if (host == 'homo_sapiens') {
    host = 'human'
  }
  h <- str_to_title(host)
  
  p.edges <- hostMat %>%
    mutate(
      Selected = case_when(
        pd < alpha & corr >= min_corr ~ TRUE,
        TRUE ~ FALSE
      )
    ) %>%
  ggplot() +
    geom_point(aes(x=corr, y=pd, color=Selected), alpha=.5, size=.5) +
    scale_color_manual(values=c("FALSE" = 'red', "TRUE" = 'blue')) +
    theme_light() +
    ylab('One-tailed p-value') +
    xlab('Correlation') +
    ggtitle(h)
  
   all.p.edges <- c(all.p.edges, setNames(list(p.edges), host))
}

(p.all.edges <- ggarrange(plotlist = all.p.edges, common.legend = T, ncol = 3, nrow=4))

ggsave(plot = p.all.edges, filename = 'figs/figS3_all_edges_dist.png', width=10, height=10)
ggsave(plot = p.all.edges, filename = 'figs/figS3_all_edges_dist.pdf', width=10, height=10)
ggsave(plot = p.all.edges, filename = 'figs/figS3_all_edges_dist.eps', width=10, height=10)

Figure S4: The relative abundance, number of ARGs, and number of correlations for each resistance class

corrsPerClass <- tibble()
for (host in names(networks)) {
  G <- networks[[host]]
  
  if(host != 'all') {
    c.data <- all.data[all.data$hostGroup == host,]
  } else {
    c.data = all.data
  }
  
  sum.arg <- sum(c.data$fragmentCountAln, na.rm=T)
  
  ho = which(host == hosts)
    
  for (resClass in unique(resClasses$ResFinder_class)) {
    genes <- resClasses[resClasses$ResFinder_class == resClass, ]$name
    m <- igraph::as_adjacency_matrix(G, sparse=F,type = 'upper')
    
    corrsPerClass <- rbind(
      corrsPerClass,
      tibble(n=sum(m[colnames(m)[colnames(m) %in% genes],])+sum(m[,rownames(m)[rownames(m) %in% genes]]), host=host, resClass=resClass, host_order=ho)
    )
    
  }
}

corrsPerClass[corrsPerClass$n == 0, 'n'] <- NA


facet_labels = c(`n` = 'Number of correlations', 
  `fragClosed` = 'Relative abundance (%)', 
  `air` = 'Air', 
  `all` = 'All', 
  `chicken` = 'Chicken',
  `cow` = 'Cow',
  `canis_lupus` = 'Dog',
  `marine` = 'Marine',
  `homo_sapiens` = 'Human',
  `mouse` = 'Mouse',
  `freshwater` = 'Freshwater',
  `soil`  = 'Soil',
  `pig`  = 'Pig',
  `nGeneClass` = 'Number of ARGs in class'
)

rbind(
  select(sum.gene.counts2,hostOrder, hostGroup, ResFinder_class, fragClosed, nGeneClass) %>%
  pivot_longer(!c(hostOrder, hostGroup, ResFinder_class)) %>%
  rename(host=hostGroup, host_order=hostOrder, resClass=ResFinder_class) %>%
  mutate(name_order=case_when(
    name == 'fragClosed' ~ 1,
    name == 'nGeneClass' ~ 2,

  )),
corrsPerClass %>%
  pivot_longer(!c(host, host_order, resClass)) %>%mutate(name_order=3)
) %>%
  ggplot(aes(x=value, y=resClass, fill=name)) +
  geom_bar(stat='identity') +
  scale_fill_manual(guide="none", values=list("n" = "#E69F00", "fragClosed"="#009E73", "nGeneClass" = '#999999')) +
  facet_wrap(fct_reorder(host, host_order)~fct_reorder(name, name_order), scales='free_x',ncol = 6, labeller=as_labeller(facet_labels)) +
  xlab('') + ylab('') +
  theme_light() 
## Warning: Removed 162 rows containing missing values (position_stack).

ggsave('figs/figS4_classes_rel_corrs.png', width=30, height=40)
## Warning: Removed 162 rows containing missing values (position_stack).
ggsave('figs/figS4_classes_rel_corrs.pdf', width=30, height=40)
## Warning: Removed 162 rows containing missing values (position_stack).
ggsave('figs/figS4_classes_rel_corrs.tiff', width=30, height=40)
## Warning: Removed 162 rows containing missing values (position_stack).

Figure S5: Networks illustrating how one ARG interacts differently depending on the sampling environments

gene.regexes <- c("catA1", "mef\\(A\\)_1", "tet\\(L\\)_4")
#all.gene.graphplots <- list()
gene.graphplots <- list()
for (gene.regex in gene.regexes) {
  
  add_legend = length(gene.regexes) == which(gene.regex == gene.regexes)
  for (host in hosts) {
    G <- networks[[host]]
    G.sel <- extract_neighborhood(gene.regex, G)
    if (is.null(G.sel)) {
      p.h <- ggplot() + ggtitle(str_to_title(host)) + theme_void() + theme_graph(base_family="sans") + annotate("text",label="No correlations", x=0, y=0)
    } else {
      
      p.h <- G.sel %>%
        activate(nodes) %>%
        left_join(filter(gene.rels, hostGroup == host), by=c("name" = "refSequence")) %>%
        mutate(name=sub('_[^_]+$', '', name)) %>%
        ggraph(layout="nicely") +
        geom_edge_link0(aes(color=corr, width=corr, alpha=corr)) +
        geom_node_point(aes(color=ResFinder_class, size=fragClo, shape=sel))  +
        geom_node_label(aes(label=name), size=2, repel = T) +
        scale_edge_width(range=c(.2, 1), guide="none") +
        scale_edge_alpha(range=c(0.5, 0.9), guide="none") +
        scale_edge_color_viridis("Correlation",direction=-1,limits=c(0.6, 1)) +
        scale_size_continuous('Relative abundance', range=c(1, 4),  breaks=c(5, 10, 25, 50, 75, 100)) +
        scale_color_manual("ResFinder class", values=classes_colors) +
        scale_shape_manual("Highlighted", values=list("Yes" = 16, "No" = 15))+
        ggtitle(str_to_title(host)) +
        theme_graph(base_family="sans") 
      
      if(!add_legend){
        p.h <- p.h + guides(colour="none", shape="none", size="none", edge_color="none")
      }
      # if (add_legend) {
      #   p.h <- p.h + 
      #     scale_color_manual("ResFinder class", values=classes_colors) +
      #     scale_edge_color_viridis("Correlation",direction=-1,limits=c(0.6, 1)) +
      #     scale_size('Relative abundance', range=c(1, 4)) +
      #     scale_shape_manual("Highlighted", values=list("Yes" = 16, "No" = 15))  
      # 
      # } else {
      #   p.h <- p.h + 
      #     scale_color_manual(guide="none", values=classes_colors) +
      #     scale_edge_color_viridis(guide="none",direction=-1,limits=c(0.6, 1)) +
      #     scale_shape_manual(guide="none", values=list("Yes" = 16, "No" = 15))  +
      #     scale_size(guide="none", range=c(1, 4)) 

      # }
    }
      gene.graphplots <- c(gene.graphplots, list(p.h))
  }
  #print(paste(gene.regex, length(gene.graphplots)))
  #p.g <- ggarrange(plotlist=gene.graphplots, nrow=1, common.legend = T, legend="bottom")
  #all.gene.graphplots <- c(all.gene.graphplots, list(p.g))
}

 p.gp.all <- ggarrange(
  plotlist=gene.graphplots,
  ncol=11, nrow=length(gene.regexes), 
  common.legend = T, 
  labels = c("a. catA1_1", rep("", 10), "b. mef(A)_1", rep("", 10), "c. tet(L)_4", rep("", 10)), 
  font.label = list(size = 18, color = "black", face = "bold", family = NULL)
)

ggsave(plot=p.gp.all, filename = "figs/figS5_network_sel.png", width=30,height=15)
ggsave(plot=p.gp.all, filename = "figs/figS5_network_sel.eps", width=30,height=15, family="sans")
ggsave(plot=p.gp.all, filename = "figs/figS5_network_sel.pdf", width=30,height=15)

Figure S6: Average class correlations

ggplot(mapping=aes(x=AM1, y=AM2)) +
  geom_tile(data=averageclasses.all, mapping=aes(fill=avgcorr)) +
  scale_fill_viridis("Average\ncorrelation",direction=-1, limits=c(0.6,1), na.value=NA) +
  geom_segment(data=diag_df, mapping=aes(x=x, xend=xend, y=y, yend=yend), color="grey") +
  new_scale_fill() +
  geom_tile(mapping=aes(fill=n_edges), data=averageclasses.all)+
  scale_fill_viridis("Number of \ncorrelation\ncoefficients",direction=-1, na.value=NA, option = 'B', breaks=c(1,3,5,10,50,100,200,337),trans="log")+  
  facet_wrap(~fct_reorder(host, ho)) +
  theme_light() +
  theme(axis.title=element_blank(), axis.text.x=element_text(angle=90, hjust=1, vjust=0.5))

ggsave('figs/figS6_network_avg_corrs_heatmap.png', width=20, height=15)
ggsave('figs/figS6_network_avg_corrs_heatmap.pdf', width=20, height=15)
ggsave('figs/figS6_network_avg_corrs_heatmap.tiff', width=20, height=15)

Figure S7: Highlights of interactions between ARGs of glycopeptide and macrolide resistances (CIA)

net.titles.host <- c("mouse" = "Mouse", "homo_sapiens" = "Human", "freshwater" = "Freshwater", "pig" = "Pig", "cow" = "Cow", "chicken" = "Chicken")

sel.nets.classes <- list(
  "glycopeptide_highlights" = c("homo_sapiens" = "Glycopeptide", "pig"= "Glycopeptide"),
  "macrolide_highlights" = c("homo_sapiens" = "Macrolide", "pig" = "Macrolide", "cow" = "Macrolide", "chicken" = "Macrolide")
  )

all.sets <- list()

for (fname in names(sel.nets.classes)) {
  nets.highlights <- list()
  node_col_scale = scale_color_manual(guide="none", values=classes_colors) 

  for(host in sort(names(sel.nets.classes[[fname]]))){
    am = sel.nets.classes[[fname]][[host]]
    G <- networks[[host]]
  
    # define layout
    e <- get.edgelist(G, names=F)
    layout <- qgraph.layout.fruchtermanreingold(e, vcount = vcount(G),
                                                area = 6*(vcount(G)^2), repulse.rad = vcount(G)^2.5)
  
    node.sel <- as_tibble(G) %>% rownames_to_column() %>% filter(str_detect(ResFinder_class, am))
    node.ids <- as.numeric(node.sel$rowname)
    G2 <- G %>%
      activate(nodes) %>%
      mutate(x=layout[,1], y=layout[,2])
  
    neighborhoods <- c()
  
    for (node.id in node.ids) {
      neighborhood <- G %>%
          convert(to_local_neighborhood, node=node.id, order=1, mode="all") %>%
          activate(edges) %>%
          mutate(highlight = T) %>%
        activate(nodes) %>%
          mutate(highlight = T)
      
      G2 <- G2 %>% bind_edges(as_data_frame(neighborhood, what="edges"))
      
      neighborhoods <- c(neighborhoods, V(neighborhood)$name) 
    }
  
    G2 %>% 
      activate(nodes) %>%
      mutate(
        focus_node = name %in% node.sel$name
      )
    
    p.neighborhoods <- G2 %>%
      activate(nodes) %>%
      mutate(
        highlight = name %in% neighborhoods, 
        gene = sub('_[^_]+$', '', name),
        focus_node = case_when(
          name %in% node.sel$name ~1,
          T ~ 0
        )
      )  %>%
      ggraph(layout="manual", x=x, y=y) +
      geom_edge_link(aes(filter=is.na(highlight)), color="gray", width=.5, alpha=.25) +
      geom_edge_link(aes(filter=!is.na(highlight), color=corr), width=.5, alpha=.5) +
      geom_node_point(aes(color=ResFinder_class, filter=highlight == T, size=focus_node)) +
      geom_node_text(aes(label=gene, filter=highlight == T, color=ResFinder_class), repel=T, size=4) +
      scale_size(breaks=1, labels=c(paste('Confers resistance\nto', am)), range=c(1,3),name = '') +
      # geom_node_label(aes(label=name), size=2, repel = T)
      # scale_edge_color_viridis("Correlation",direction=-1, limits=c(0.6, 1)) +
      scale_edge_color_viridis(guide="none",direction=-1, limits=c(0.6, 1)) +
      node_col_scale +
      ggtitle(net.titles.host[[host]]) +
      theme_graph(base_family="sans") +
      theme(legend.position = "bottom", plot.title = element_text(hjust = 0.5, size = 14))
    
     nets.highlights <- c(nets.highlights, list(p.neighborhoods))
  }

  p.sel<-ggarrange(plotlist=nets.highlights, ncol=2,nrow=length(sel.nets.classes[[fname]])/2, common.legend = T, legend="right")
  ggsave(filename=paste0('figs/',fname, '.png'), height=7*length(sel.nets.classes[[fname]])/2, width=5*length(sel.nets.classes[[fname]]))
  
  all.sets <- c(all.sets, list(p.sel))
}
## Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
p.cia <- ggarrange(plotlist=all.sets, ncol=1, labels = c('a. Glycopeptide', 'b. Macrolide'), heights = c(1, 2), font.label = list(size=16))
ggsave(plot = p.cia, filename='figs/figS7_cia_highlights.png', width=14, height=21)
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggsave(plot = p.cia, filename='figs/figS7_cia_highlights.pdf', width=14, height=21)
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggsave(plot = p.cia, filename='figs/figS7_cia_highlights.tiff', width=14, height=21)
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Figure S8: Highlights of interactions between ARGs of pleuromutilin and tetracycline resistances (HIA/IA)

asel.nets.classes <- list(
  "pleuromutiline_highlights" =c("homo_sapiens" = "Pleuromutilin", "pig" = "Pleuromutilin", "cow" = "Pleuromutilin", "chicken" = "Pleuromutilin"),
  "tetracycline_highlights" = c("homo_sapiens" = "Tetracycline", "pig" = "Tetracycline", "cow" = "Tetracycline", "chicken" = "Tetracycline")
  )

all.sets <- list()

for (fname in names(sel.nets.classes)) {
  nets.highlights <- list()
  node_col_scale = scale_color_manual(guide="none", values=classes_colors) 

  for(host in sort(names(sel.nets.classes[[fname]]))){
    am = sel.nets.classes[[fname]][[host]]
    G <- networks[[host]]
  
    # define layout
    e <- get.edgelist(G, names=F)
    layout <- qgraph.layout.fruchtermanreingold(e, vcount = vcount(G),
                                                area = 6*(vcount(G)^2), repulse.rad = vcount(G)^2.5)
  
    node.sel <- as_tibble(G) %>% rownames_to_column() %>% filter(str_detect(ResFinder_class, am))
    node.ids <- as.numeric(node.sel$rowname)
    G2 <- G %>%
      activate(nodes) %>%
      mutate(x=layout[,1], y=layout[,2])
  
    neighborhoods <- c()
  
    for (node.id in node.ids) {
      neighborhood <- G %>%
          convert(to_local_neighborhood, node=node.id, order=1, mode="all") %>%
          activate(edges) %>%
          mutate(highlight = T) %>%
        activate(nodes) %>%
          mutate(highlight = T)
      
      G2 <- G2 %>% bind_edges(as_data_frame(neighborhood, what="edges"))
      
      neighborhoods <- c(neighborhoods, V(neighborhood)$name) 
    }
  
    G2 %>% 
      activate(nodes) %>%
      mutate(
        focus_node = name %in% node.sel$name
      )
    
    p.neighborhoods <- G2 %>%
      activate(nodes) %>%
      mutate(
        highlight = name %in% neighborhoods, 
        gene = sub('_[^_]+$', '', name),
        focus_node = case_when(
          name %in% node.sel$name ~1,
          T ~ 0
        )
      )  %>%
      ggraph(layout="manual", x=x, y=y) +
      geom_edge_link(aes(filter=is.na(highlight)), color="gray", width=.5, alpha=.25) +
      geom_edge_link(aes(filter=!is.na(highlight), color=corr), width=.5, alpha=.5) +
      geom_node_point(aes(color=ResFinder_class, filter=highlight == T, size=focus_node)) +
      geom_node_text(aes(label=gene, filter=highlight == T, color=ResFinder_class), repel=T, size=4) +
      scale_size(breaks=1, labels=c(paste('Confers resistance\nto', am)), range=c(1,3),name = '') +
      # geom_node_label(aes(label=name), size=2, repel = T)
      # scale_edge_color_viridis("Correlation",direction=-1, limits=c(0.6, 1)) +
      scale_edge_color_viridis(guide="none",direction=-1, limits=c(0.6, 1)) +
      node_col_scale +
      ggtitle(net.titles.host[[host]]) +
      theme_graph(base_family="sans") +
      theme(legend.position = "bottom", plot.title = element_text(hjust = 0.5, size = 14))
    
     nets.highlights <- c(nets.highlights, list(p.neighborhoods))
  }

  p.sel<-ggarrange(plotlist=nets.highlights, ncol=2,nrow=length(sel.nets.classes[[fname]])/2, common.legend = T, legend="right")
  ggsave(filename=paste0('figs/',fname, '.png'), height=7*length(sel.nets.classes[[fname]])/2, width=5*length(sel.nets.classes[[fname]]))
  
  all.sets <- c(all.sets, list(p.sel))
}
## Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
p.hia <- ggarrange(plotlist=all.sets, ncol=1, labels = c('a. Pleuromutilin', 'b. Tetracycline'),font.label = list(size=16))
ggsave(plot = p.hia,filename='figs/figS8_ia_highlights.png', width=14, height=21)
## Warning: ggrepel: 8 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggsave(plot = p.hia,filename='figs/figS8_ia_highlights.pdf', width=14, height=21)
## Warning: ggrepel: 8 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggsave(plot = p.hia,filename='figs/figS8_ia_highlights.tiff', width=14, height=21)
## Warning: ggrepel: 8 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps